{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "5e0768d5",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/jerryjliu/llama_index/blob/main/docs/examples/query_transformations/SimpleIndexDemo-multistep.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05",
   "metadata": {},
   "source": [
    "# Multi-Step Query Engine\n",
    "\n",
    "We have a multi-step query engine that's able to decompose a complex query into sequential subquestions. This\n",
    "guide walks you through how to set it up!"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cc77f986",
   "metadata": {},
   "source": [
    "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20a71098",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-llms-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1321fe6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install llama-index"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c5885d63",
   "metadata": {},
   "source": [
    "#### Download Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee4ace96",
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir -p 'data/paul_graham/'\n",
    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119",
   "metadata": {},
   "source": [
    "#### Load documents, build the VectorStoreIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65ebfca2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
    "from llama_index.llms.openai import OpenAI\n",
    "from IPython.display import Markdown, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c48da73f-aadb-480c-8db1-99c915b7cc1c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# LLM (gpt-3.5)\n",
    "gpt35 = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
    "\n",
    "# LLM (gpt-4)\n",
    "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "03d1691e-544b-454f-825b-5ee12f7faa8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load documents\n",
    "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58",
   "metadata": {},
   "outputs": [],
   "source": [
    "index = VectorStoreIndex.from_documents(documents)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4",
   "metadata": {},
   "source": [
    "#### Query Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95d989ba-0c1d-43b6-a1d3-0ea7135f43a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.indices.query.query_transform.base import (\n",
    "    StepDecomposeQueryTransform,\n",
    ")\n",
    "\n",
    "# gpt-4\n",
    "step_decompose_transform = StepDecomposeQueryTransform(llm=gpt4, verbose=True)\n",
    "\n",
    "# gpt-3\n",
    "step_decompose_transform_gpt3 = StepDecomposeQueryTransform(\n",
    "    llm=gpt35, verbose=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a124db0-e2d7-4566-bcec-1d41cf669ff4",
   "metadata": {},
   "outputs": [],
   "source": [
    "index_summary = \"Used to answer questions about the author\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "85466fdf-93f3-4cb1-a5f9-0056a8245a6f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1;3;33m> Current query: Who was in the first batch of the accelerator program the author started?\n",
      "\u001b[0m\u001b[1;3;38;5;200m> New query: Who is the author of the accelerator program?\n",
      "\u001b[0m\u001b[1;3;33m> Current query: Who was in the first batch of the accelerator program the author started?\n",
      "\u001b[0m\u001b[1;3;38;5;200m> New query: Who was in the first batch of the accelerator program started by Paul Graham?\n",
      "\u001b[0m\u001b[1;3;33m> Current query: Who was in the first batch of the accelerator program the author started?\n",
      "\u001b[0m\u001b[1;3;38;5;200m> New query: None\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "from llama_index.core.query_engine import MultiStepQueryEngine\n",
    "\n",
    "query_engine = index.as_query_engine(llm=gpt4)\n",
    "query_engine = MultiStepQueryEngine(\n",
    "    query_engine=query_engine,\n",
    "    query_transform=step_decompose_transform,\n",
    "    index_summary=index_summary,\n",
    ")\n",
    "response_gpt4 = query_engine.query(\n",
    "    \"Who was in the first batch of the accelerator program the author\"\n",
    "    \" started?\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bdda1b2c-ae46-47cf-91d7-3153e8d0473b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>In the first batch of the accelerator program started by the author, the participants included the founders of Reddit, Justin Kan and Emmett Shear who later founded Twitch, Aaron Swartz who had helped write the RSS spec and later became a martyr for open access, and Sam Altman who later became the second president of YC.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response_gpt4}</b>\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c9670bd-729d-478b-a77c-c6e13c282456",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('Who is the author of the accelerator program?', 'The author of the accelerator program is Paul Graham.'), ('Who was in the first batch of the accelerator program started by Paul Graham?', 'The first batch of the accelerator program started by Paul Graham included the founders of Reddit, Justin Kan and Emmett Shear who later founded Twitch, Aaron Swartz who had helped write the RSS spec and later became a martyr for open access, and Sam Altman who later became the second president of YC.')]\n"
     ]
    }
   ],
   "source": [
    "sub_qa = response_gpt4.metadata[\"sub_qa\"]\n",
    "tuples = [(t[0], t[1].response) for t in sub_qa]\n",
    "print(tuples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec88df57",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1;3;33m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[1;3;38;5;200m> New query: Who is the author who founded Viaweb?\n",
      "\u001b[0m\u001b[1;3;33m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[1;3;38;5;200m> New query: In which city did Paul Graham found his first company, Viaweb?\n",
      "\u001b[0m\u001b[1;3;33m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[1;3;38;5;200m> New query: None\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "response_gpt4 = query_engine.query(\n",
    "    \"In which city did the author found his first company, Viaweb?\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "653508f1-b2b0-479a-85b3-113cda507231",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The author founded his first company, Viaweb, in Cambridge.\n"
     ]
    }
   ],
   "source": [
    "print(response_gpt4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9fa93cdb-7007-4664-853a-5c81c6c17560",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1;3;33m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[1;3;38;5;200m> New query: None\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "query_engine = index.as_query_engine(llm=gpt35)\n",
    "query_engine = MultiStepQueryEngine(\n",
    "    query_engine=query_engine,\n",
    "    query_transform=step_decompose_transform_gpt3,\n",
    "    index_summary=index_summary,\n",
    ")\n",
    "\n",
    "response_gpt3 = query_engine.query(\n",
    "    \"In which city did the author found his first company, Viaweb?\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05899fcf-7a04-4d21-9e6d-04983755d175",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Empty Response\n"
     ]
    }
   ],
   "source": [
    "print(response_gpt3)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
